QRZ! Ham Radio 4
QRZ Ham Radio Callsign Database - Volume 4.iso
< prev
next >
Assembly Source File
372 lines
page 132,60,1,1
opt nomd,mex
;Motorola Austin DSP Operation June 30,1988
;Port to Memory FFT - 1024 point
;File name: F-56.asm
; Maximum sample rate: 1.958 ms at 20.5 MHZ/ 1.487 ms at 27.0 MHz
; Memory Size: Prog: 254 words ; Data: 7170 words
; Number of clock cycles: 40144 (20072 instruction cycles)
; Clock Frequency: 20.5MHz/27.0MHz
; Instruction cycle time: 97.5ns / 74.1ns
fftreald macro points,data,odata,coef,ptr1,ptr2
fftreald ident 1,0
; Radix 2 Decimation in Time In-Place Fast Fourier Transform Routine
; Real input data - normally ordered
; Real data in Y memory, 2 buffers: one being filled, the other one being processed
; Complex output data - normally ordered
; Real data in X memory
; Imaginary data in Y memory
; Coefficient lookup table
; -Cosine value in X memory
; -Sine value in Y memory
; Macro Call - fftreald points,data,outdata,coef,ptr1,ptr2
; points number of points (2-32768, power of 2)
; data start of data buffer
; outdata output data buffer
; coef start of sine/cosine table
; ptr1 memory location of pointer to input data block 1
; ptr2 memory location of pointer to input data block 2
; Alters Data ALU Registers
; x1 x0 y1 y0
; a2 a1 a0 a
; b2 b1 b0 b
; Alters Address Registers
; r0 n0 m0
; r1 n1 m1
; r2 n2 m2
; r3 n3 m3
; r4 n4 m4
; r5 n5 m5
; r6 n6 m6
; r7 n7 m7
; Alters Program Control Registers
; pc sr
; Uses 8 locations on System Stack
_intdata equ $0 ;internal data space at 0
;Check r7 to see if input buffer is filled
strt move #points,b ;input buffer length
loop move r7,a ;get input data pointer
sub a,b ;subtract buffer length from current input location
move x:ptr1,a ;move input data base addres into a
cmp a,b ;see if equal
jne loop ;if not, go back
; when ready, swap pointers of buffer to be loaded and buffer to be processed
move x:ptr1,a
move x:ptr2,b
move b,x:ptr1
move a,x:ptr2
; main fft routine
move x:ptr2,r2 ;initialize input pointers
move #4,n2 ;initial offset for r2
move r2,r0 ;input pointer for real passes
move (r2)+n2 ;update external input pointer for complex passes
move #points/4,n0 ;initialize input and output offsets
move #points-1,m0 ;initialize address modifiers for modulo N
move r0,r4 ;set up butterfly pointers in and out
move (r0)+n0 ;
move r0,r5 ;
move (r0)+n0 ;
move r0,r1 ;
move r4,r0 ;
move m0,m1 ;modulo N for remaining pointers in and out
move m0,m4 ;
move m0,m5 ;
; Do first and second Radix 2 FFT passes: all have real input. First and second
; passes are combined using four-point butterflies.
move y:(r0)+n0,a ;get ar
move y:(r0)+n0,y1 ;get br
move y:(r0)+n0,b ;get cr
add a,b y:(r0)+n0,y0 ;(ar+cr),get dr
subl b,a ;cr'=(ar-cr)
do n0,_twopass ;do all four point butterflies
tfr y0,a a,x:(r1) ;get dr,save cr'
sub y1,a (r0)+ ;ci'=(dr-br)
tfr y1,a a,y:(r1)+ ;get br,save ci'
add y0,a y:(r0)+n0,x1 ;(br+dr),get ar
add b,a y:(r0)+n0,y1 ;ar'=(ar+cr)+(br+dr),get br
subl a,b a,y:(r4)+ ;br'=(ar+cr)-(br+dr),save ar'
tfr x1,a b,x0 y:(r0)+n0,b ;get ar,move br',get cr
add a,b y:(r0)+n0,y0 ;(ar+cr),get dr
subl b,a x0,x:(r5)+ ;cr'=(ar-cr),save br'
; Do next real-input FFT (RFFT) passes. Each RFFT butterfly is a four-point in,
; 3-point out. The fourth point is not computed since it is later obtained by
;using the conjugate symmetry property of the RFFT.
move #points/8,n5 ;spacing, for 1024 spacing=128
do #@cvi(@log(points)/@log(2)-2.5),_next ;7 passes for 1024 pts
move #data,r5 ;point to data
move n5,n0 ;same offset
move r5,r0 ;ar pointer
move (r5)+n5 ;+1/4
move r5,r4 ;br pointer
move (r5)+n5 ;+1/2
move r5,r1 ;ci pointer
move (r5)+n5 ;+3/4
move y:(r0)+n0,a ;get ar
move y:(r0)-n0,b ;get br
add a,b ;ar'=(ar+br)
do n0,_nextpass ;do for all p
subl b,a x:(r5)+,b b,y:(r0)+ ;br'=(ar-br),get dr,save ar'
neg b a,x:(r4)+ y:(r0)+n0,a ;ci'=-dr,save br',get ar
move b,x0 y:(r0)-n0,b ;move ci',get br
add a,b x0,y:(r1)+ ;ar'=(ar+br),save ci'
move n5,a ;get bflys/pass
lsr a ;/2
move a1,n5 ;put back
; special RFFT pass: real input, (4-point). Complex output: stored in normal
; order, 4-th output stored as complex conjugate of 3rd output.
move #data,r0 ;input pointer
move #odata,r4 ;output pointer
move #points/2,n4 ;output pointer offset
move #0,m4 ;bit reverse output
move y:(r0)+,a ;get ar
move y:(r0)+,b ;get br
add a,b x:(r0)+,x0 ;ar'=ar+br, get cr
move b,x:(r4)+n4 ;save ar'
subl b,a x:(r0),b ;br'=ar-br, get dr
neg b b,y0 a,x:(r4)+n4 ;ci'=-dr, save dr, save br'
move x0,x:(r4) ;save cr'
move b,y:(r4)+n4 ;save ci'
move x0,x:(r4) ;save cr'
move y0,y:(r4)+n4 ;save cr,ci'*
; do first 2-point complex fft with conjugate storage
; initialization
move r2,r0 ;r0 points to external data
move #-1,m2 ;linear addr. for external input data pointer
move #4,n2 ;offset for external input data pointer
move #points/8,r3 ;coefficient base offset -->r3
move (r2)+n2 ;update external input data pointer
lua (r0)+,r1 ;initialize input pointer b
lua (r3)+n3,r6 ;initialize twiddle factor pointer
move #points/4,n4 ;offset for output counter a
move r4,n3 ;initialization of conjugate pointer
move #odata+points,r3 ;
move r4,r5 ;initialize output pointer b
move (r3)-n3 ;initialize conjugate pointer
move #odata,n3 ;
lua (r4)+n4,r5 ;initialize output pointer b
move (r3)+n3 ;initialize conjugate pointer
move n4,n5 ;initialize offset for output pointer b
move #0,m4 ;bit-reversed addressing for output ptr a
move (r5)+n5 ;initialize output pointer b
move #0,m5 ;bit-reversed addressing for output ptr b
move #0,m3 ;bit-reversed addressing for conjug. ptr.
move #points/2,n3 ;offset for conjugate pointer
move y:(r0),b ;initialize butterfly
move (r3)+n3 ;future output pointer a
move r3,ssh ;save future output pointer a -->stack
move (r3)-n3 ;reinit. conjugate pointer
; butterfly with conjugate storage
move x:(r1),x1 y:(r6),y0
mac x1,y0,b x:(r6),x0 y:(r1),y1
macr -x0,y1,b y:(r0),a
neg b b,y:(r4)
move b,y:(r3)-n3
addl b,a x:(r0),b
neg a a,y:(r5)
move a,y:(r3)+n3
mac -x1,x0,b x:(r0),a
macr -y1,y0,b
subl b,a b,x:(r4)+n4
move b,x:(r3)-n3
move a,x:(r5)+n5
move a,x:(r3)-n3
; end of butterfly
; initialize pointers for complex fft's
move #coef,n3 ;initialize coefficient base
move #-1,m3
move m3,m4 ;output pointer a has linear addr.
move m3,m5 ;output pointer b has linear addr.
move ssh,r4 ;initialize next external output pointer a position
move #2,m2 ;initialize butterflies per group
move #1,n4 ;initialize number of passes-1 per FFT
; do all the complex fft's that are necessary (up to N/4-point)
do #@cvi(@log(points)/@log(2)-2.5),_end_fft ;7 for 1024 pt (4- pt....256- pt)
; initialize pointers in each fft
move r4,ssh ;push output data address onto stack
move r2,r0 ;get external data input address for first pass
move #points/8,r3 ;update coefficient offset
move m2,n1 ;initialize butterflies per group
move #1,n2 ;initialize groups per pass
; complex fft passes are triple nested do-loops, with last pass split out
do n4,_end_pass ;do all passes but last in this fft
; initialize pointers in each pass
move n4,ssh ;put number of passes-1 in FFT on stack
move #_intdata,r4 ;initialize A output pointer
move n1,r5
move n1,n0 ;initialize pointer offsets
lua (r5)-,n7
move n1,n4
move n1,r6
lua (r0)+n0,r1 ;initialize B input pointer
lua (r4)+n4,r5 ;initialize B output pointer
lua (r6)+,n4
move n4,n5
lua (r3)+n3,r6 ;initialize W input pointer
move n4,n0
; initialize butterfly input
move x:(r1),x1 y:(r6),y0 ;lookup -sine value
move y:(r0),b ;imag. input a
mac x1,y0,b x:(r6)+n6,x0 y:(r1)+,y1 ;cos., imag. input b
macr -x0,y1,b y:(r0),a ;
; butterflies
do n2,_end_grp ;do for all groups
do n7,_end_bfy ;do every butterfly in this group
subl b,a x:(r0),b b,y:(r4)
mac -x1,x0,b x:(r0)+,a a,y:(r5)
macr -y1,y0,b x:(r1),x1
subl b,a b,x:(r4)+ y:(r0),b
mac x1,y0,b y:(r1)+,y1 ;Radix 2 DIT butterfly kernel
macr -x0,y1,b a,x:(r5)+ y:(r0),a ;with constant twiddle factor
move (r1)+n1
subl b,a x:(r0),b b,y:(r4)
mac -x1,x0,b x:(r0)+n0,a a,y:(r5)
macr -y1,y0,b x:(r1),x1 y:(r6),y0 ;lookup -sine value
subl b,a b,x:(r4)+n4 y:(r0),b
mac x1,y0,b x:(r6)+n6,x0 y:(r1)+,y1
macr -x0,y1,b a,x:(r5)+n5 y:(r0),a ;with constant twiddle factor
move n1,b1
lsr b n2,a1 ;divide butterflies per group by two
lsl a b1,n1 ;multiply groups per pass by two
move r3,b1
move ssh,n4 ;get number of passes-1 back from stack
lsr b a1,n2 ;divide coefficient offset by two
move b1,r3
move #_intdata,r0 ;intermediate passes use internal input data
; Do last FFT pass and move output data off-chip to external data memory.
; The output data is stored in normal order. At the same time, data is stored for
; the next output block using conjugate properties and a "reverse counter"
; initialize pointers
move n7,r1
move ssh,r4
move (r1)+
move n4,ssh ;put #passes-1 in this fft back on stack
move r1,n0
move r1,n1 ;correct pointer offset for last pass
move r1,n4
move r1,n5
move #points/4,n4 ;offset for output pointer A
lua (r0)+,r1 ;initialize B input pointer
lua (r4)+n4,r5 ;initialize B output pointer, first step
move n4,n5 ;offset for output pointer B
lua (r3)+n3,r6 ;initialize W input pointer
move (r5)+n5 ;initialize B output pointer, second step
move #0,m4 ;bit-reversed addressing for output pointer A
move r4,n3 ;initialization of conjugate pointer
move #odata+points,r3
; initialize butterfly
move y:(r0),b ;initialization of first butterfly
move (r3)-n3 ;initialization of conjugate pointer
move #odata,n3 ;
move x:(r1),x1 y:(r6),y0 ;initialization of first butterfly
move (r3)+n3 ;initialization of conjugate pointer
move #0,m3 ;bit-reversed addressing for conjugate ptr
move #points/2,n3 ;correct offset for conjugate pointer
move m4,m5 ;bit-reversed addressing for output pointer B
move (r3)+n3
move r3,ssh ;put next output ptr a initialization on stack
move (r3)-n3 ;reset conjugate pointer
; final butterfly with conjugate reverse storage of next block
do n2,_lastpass
mac x1,y0,b x:(r6)+n6,x0 y:(r1)+n1,y1 ;Radix 2 DIT butterfly kernel
macr -x0,y1,b y:(r0),a ;with one butterfly per group
neg b b,y:(r4) ;and changing twiddle factor
move b,y:(r3)-n3 ;with conjugate storage
addl b,a x:(r0),b
neg a a,y:(r5)
move a,y:(r3)+n3
mac -x1,x0,b x:(r0)+n0,a
macr -y1,y0,b x:(r1),x1 y:(r6),y0
move b,x:(r4)+n4
subl b,a b,x:(r3)-n3
move a,x:(r5)+n5 y:(r0),b
move a,x:(r3)-n3
; update pointers
move ssh,r4 ;get updated output ptr a from stck for next fft
move #coef,n3 ;n3 points to coeff. base address again
move #-1,m3 ;linear addressing for r3 again
move n6,n2 ;get fft data input offset
move m2,a ;initial data offset-->a
move ssh,r6 ;get #passes in FFT-1 back from stack
lsl a ;initial data offset * 2 -->a
move #-1,m6 ;r6 increments linearly in next instruction
move #-1,m2 ;external data pointer uses linear addressing
lua (r6)+,n4 ;increment #passes-1 -->n4
move #0,m6 ;r6 increments bit-reversed again
move a1,r6 ;new initial data offset-->r6
lsl a ;2*#points in this fft -->a
move a1,n2 ;offset for new external input data
move m0,m4 ;initialize output pointers again for modulo addr.
move m0,m5 ;
move (r2)+n2 ;point to next complex fft input data block
move r6,m2 ;new initial data offset for next FFT-->m2
; when fft is finished, jump back to see if data collection for next fft is completed
jmp strt
org p:$8
movep y:$ffff,y:(r7)+ ;data collection upon interrupt
org p:$100
move #256,a ;store pointer to data block 1
move a,x:3328
move #1280,a ;store pointer to data block 2
move a,x:3329
move #2047,m7 ;set r7 for modulo addressing
; call fft macro
fftreald 1024,256,2304,3330,3328,3329